1/text: Text Files. @Purpose: To read text files of whatever flavour, one line at a time. @Interface: -- Owns struct text_file_position (private) @Definitions: @ @c typedef struct text_file_position { char text_file_filename[MAX_FILENAME_LENGTH]; int line_count; int line_position; int skip_terminator; int actively_scanning; /* whether we are still interested in the rest of the file */ } text_file_position; @------------------------------------------------------------------------------- @p Text file positions. This is useful for error messages: @c /**/ void describe_file_position(char *t, text_file_position *tfp) { *t = 0; if (tfp == NULL) return; sprintf(t, "%s, line %d: ", tfp->text_file_filename, tfp->line_count); } @ @c /**/ int tfp_get_line_count(text_file_position *tfp) { if (tfp == NULL) return 0; return tfp->line_count; } @ @c /**/ void tfp_lose_interest(text_file_position *tfp) { tfp->actively_scanning = FALSE; } @p Error messages. |cBlorb| is only minimally helpful when diagnosing problems, because it's intended to be used as the back end of a system which only generates correct blurb files, so that everything will work -- ideally, the Inform user will never know that |cBlorb| exists. @c text_file_position *error_position = NULL; /**/ void set_error_position(text_file_position *tfp) { error_position = tfp; } /**/ void error(char *erm) { char err[MAX_FILENAME_LENGTH]; describe_file_position(err, error_position); sprintf(err+strlen(err), "Error: %s\n", erm); spool_error(err); } /**/ void error_1(char *erm, char *s) { char err[MAX_FILENAME_LENGTH]; describe_file_position(err, error_position); sprintf(err+strlen(err), "Error: %s: '%s'\n", erm, s); spool_error(err); } /**/ void errorf_1s(char *erm, char *s1) { char err[MAX_FILENAME_LENGTH]; sprintf(err, erm, s1); spool_error(err); } /**/ void errorf_2s(char *erm, char *s1, char *s2) { char err[MAX_FILENAME_LENGTH]; sprintf(err, erm, s1, s2); spool_error(err); } /**/ void fatal(char *erm) { char err[MAX_FILENAME_LENGTH]; describe_file_position(err, error_position); sprintf(err+strlen(err), "Fatal error: %s\n", erm); spool_error(err); print_report(); exit(1); } /**/ void fatal_fs(char *erm, char *fn) { char err[MAX_FILENAME_LENGTH]; describe_file_position(err, error_position); sprintf(err+strlen(err), "Fatal error: %s: filename '%s'\n", erm, fn); spool_error(err); print_report(); exit(1); } /**/ void warning_fs(char *erm, char *fn) { char err[MAX_FILENAME_LENGTH]; describe_file_position(err, error_position); fprintf(stderr, "%sWarning: %s: filename '%s'\n", err, erm, fn); } @ Errors are spooled to a placeholder, for the benefit of the report: @c void spool_error(char *err) { append_to_placeholder("CBLORBERRORS", "
  • "); append_to_placeholder("CBLORBERRORS", err); append_to_placeholder("CBLORBERRORS", "
  • "); fprintf(stderr, "%s", err); error_count++; } @p File handling. We read lines in, delimited by any of the standard line-ending characters, and send them one at a time to a function called |iterator|. @c /**/ void file_read(char *filename, char *message, int serious, void (iterator)(char *, text_file_position *), text_file_position *start_at) { FILE *HANDLE; text_file_position tfp; @; @; @; fclose(HANDLE); } @ @ = if (strlen(filename) >= MAX_FILENAME_LENGTH) { if (serious) fatal_fs("filename too long", filename); error_1("filename too long", filename); return; } HANDLE = fopen(filename, "rb"); if (HANDLE == NULL) { if (message == NULL) return; if (serious) fatal_fs(message, filename); else { error_1(message, filename); return; } } @ The ANSI definition of |ftell| and |fseek| says that, with text files, the only definite position value is 0 -- meaning the beginning of the file -- and this is what we initialise |line_position| to. We must otherwise only write values returned by |ftell| into this field. @ = if (start_at == NULL) { tfp.line_count = 1; tfp.line_position = 0; tfp.skip_terminator = 'X'; } else { tfp = *start_at; if (fseek(HANDLE, (long int) (tfp.line_position), SEEK_SET)) { if (serious) fatal_fs("unable to seek position in file", filename); error_1("unable to seek position in file", filename); return; } } tfp.actively_scanning = TRUE; strcpy(tfp.text_file_filename, filename); @ We aim to get this right whether the lines are terminated by |0A|, |0D|, |0A 0D| or |0D 0A|. The final line is not required to be terminated. @ = char line[MAX_TEXT_FILE_LINE_LENGTH+1]; int i = 0, c = ' '; int warned = FALSE; while ((c != EOF) && (tfp.actively_scanning)) { c = fgetc(HANDLE); if ((c == EOF) || (c == '\x0a') || (c == '\x0d')) { line[i] = 0; if ((i > 0) || (c != tfp.skip_terminator)) { @; if (c == '\x0a') tfp.skip_terminator = '\x0d'; if (c == '\x0d') tfp.skip_terminator = '\x0a'; } else tfp.skip_terminator = 'X'; @; i = 0; } else { if (i < MAX_TEXT_FILE_LINE_LENGTH) line[i++] = (char) c; else { if (serious) fatal_fs("line too long", filename); if (warned == FALSE) { warning_fs("line too long (truncating it)", filename); warned = TRUE; } } } } if ((i > 0) && (tfp.actively_scanning)) @; @ We update the line counter only when a line is actually sent: @ = iterator(line, &tfp); tfp.line_count++; @ But we update the text file position after every apparent line terminator. This is because we might otherwise, on a Windows text file, end up with an |ftell| position in between the |CR| and the |LF|; if we resume at that point, later on, we'll then have an off-by-one error in the line numbering in the resumption as compared to during the original pass. Properly speaking, |ftell| returns a long |int|, not an |int|, but on a 32-bit integer machine -- which Inform requires -- this gives us room for files to run to 2GB. Text files seldom come that large. @ = tfp.line_position = (int) (ftell(HANDLE)); if (tfp.line_position == -1) { if (serious) fatal_fs("unable to determine position in file", filename); error_1("unable to determine position in file", filename); } @p Two string utilities. @c /**/ char *trim_white_space(char *original) { int i; for (i=0; white_space(original[i]); i++) ; original += i; for (i=strlen(original)-1; ((i>=0) && (white_space(original[i]))); i--) original[i] = 0; return original; } @ @c /**/ void extract_word(char *fword, char *line, int size, int word) { int i = 0; fword[0] = 0; while (word > 0) { word--; while (white_space(line[i])) i++; int j = 0; while ((line[i]) && (!white_space(line[i]))) { if (j < size-1) fword[j++] = tolower(line[i]); i++; } fword[j] = 0; if (line[i] == 0) break; } if (word > 0) fword[0] = 0; } @ Where we define white space as spaces and tabs only: @c int white_space(int c) { if ((c == ' ') || (c == '\t')) return TRUE; return FALSE; } @p Other file utilities. Although this section is called ``Text Files'', it also has a couple of general-purpose file utilities: @c /**/ char *get_filename_extension(char *filename) { int i = strlen(filename) - 1; while ((i>=0) && (filename[i] != '.') && (filename[i] != SEP_CHAR)) i--; if ((i<0) || (filename[i] == SEP_CHAR)) return filename + strlen(filename); return filename + i; } /**/ char *get_filename_leafname(char *filename) { int i = strlen(filename) - 1; while ((i>=0) && (filename[i] != SEP_CHAR)) i--; return filename + i + 1; } /**/ int file_exists(char *filename) { FILE *TEST = fopen(filename, "r"); if (TEST) { fclose(TEST); return TRUE; } return FALSE; } /**/ long int file_size(char *filename) { FILE *TEST_FILE = fopen(filename, "rb"); if (TEST_FILE) { if (fseek(TEST_FILE, 0, SEEK_END) == 0) { long int file_size = ftell(TEST_FILE); if (file_size == -1L) fatal_fs("ftell failed on linked file", filename); fclose(TEST_FILE); return file_size; } else fatal_fs("fseek failed on linked file", filename); fclose(TEST_FILE); } return -1L; } /**/ int copy_file(char *from, char *to, int suppress_error) { if ((from == NULL) || (to == NULL) || (strcmp(from, to) == 0)) fatal("files confused in copier"); FILE *FROM = fopen(from, "rb"); if (FROM == NULL) { if (suppress_error == FALSE) fatal_fs("unable to read file", from); return -1; } FILE *TO = fopen(to, "wb"); if (TO == NULL) { fatal_fs("unable to write to file", to); return -1; } int size = 0; while (TRUE) { int c = fgetc(FROM); if (c == EOF) break; size++; putc(c, TO); } fclose(FROM); fclose(TO); return size; }